home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Skunkware 5
/
Skunkware 5.iso
/
src
/
Tools
/
mkid
/
mkid.c
< prev
next >
Wrap
C/C++ Source or Header
|
1995-05-03
|
17KB
|
748 lines
static char copyright[] = "@(#)Copyright (c) 1986, Greg McGary";
static char sccsid[] = "@(#)mkid.c 1.4 86/11/06";
#include <bool.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <id.h>
#include <bitops.h>
#include <errno.h>
#include <extern.h>
int idnHashCmp();
int idnQsortCmp();
int round2();
struct idname *newIdName();
void extractId();
void fileIdArgs();
void initHashTable();
void oldIdArgs();
void rehash();
void updateID();
void writeID();
long NameCount; /* Count of names in database */
long NumberCount; /* Count of numbers in database */
long StringCount; /* Count of strings in database */
long SoloCount; /* Count of identifiers that occur only once */
long HashSize; /* Total Slots in hash table */
long HashMaxLoad; /* Maximum loading of hash table */
long HashFill; /* Number of keys inserted in table */
long HashProbes; /* Total number of probes */
long HashSearches; /* Total number of searches */
struct idname **HashTable; /* Vector of idname pointers */
bool Verbose = FALSE;
int ArgsCount = 0; /* Count of args to save */
int ScanCount = 0; /* Count of files to scan */
int PathCount = 0; /* Count of files covered in database */
int BitArraySize; /* Size of bit array slice (per name) */
char *MyName;
static void
usage()
{
fprintf(stderr, "Usage: %s [-f<idfile>] [-s<dir>] [-r<dir>] [(+|-)l[<lang>]] [-v] [(+|-)S<scanarg>] [-a<argfile>] [-] [-u] [files...]\n", MyName);
exit(1);
}
main(argc, argv)
int argc;
char **argv;
{
char *arg;
int op;
FILE *argFILE = NULL;
char *idFile = IDFILE;
char *rcsDir = NULL;
char *sccsDir = NULL;
struct idarg *idArgs, *idArgHead;
bool keepLang = FALSE;
int argsFrom = 0;
#define AF_CMDLINE 0x1 /* file args came on command line */
#define AF_FILE 0x2 /* file args came from a file (-f<file>) */
#define AF_IDFILE 0x4 /* file args came from an old ID file (-u) */
#define AF_QUERY 0x8 /* no file args necessary: usage query */
MyName = basename(GETARG(argc, argv));
#ifdef ERRLINEBUF
setlinebuf(stderr);
#endif
idArgs = idArgHead = NEW(struct idarg);
/*
Process some arguments, and snarf-up some
others for processing later.
*/
while (argc) {
arg = GETARG(argc, argv);
if (*arg != '-' && *arg != '+') {
argsFrom |= AF_CMDLINE;
idArgs->ida_arg = arg;
idArgs->ida_flags = IDA_SCAN|IDA_PATH;
idArgs->ida_index = postIncr(&PathCount);
ScanCount++;
idArgs = (idArgs->ida_next = NEW(struct idarg));
continue;
}
op = *arg++;
switch (*arg++)
{
case 'u':
argsFrom |= AF_IDFILE;
oldIdArgs(idFile, &idArgs);
break;
case '\0':
argsFrom |= AF_FILE;
fileIdArgs(stdin, &idArgs);
break;
case 'a':
if ((argFILE = fopen(arg, "r")) == NULL) {
filerr("open", arg);
exit(1);
}
argsFrom |= AF_FILE;
fileIdArgs(argFILE, &idArgs);
break;
case 'f':
idFile = arg;
break;
case 'v':
Verbose = TRUE;
break;
case 'S':
if (strchr(&arg[-2], '?')) {
setScanArgs(op, arg);
argsFrom |= AF_QUERY;
}
/*FALLTHROUGH*/
case 'l':
case 's':
case 'r':
idArgs->ida_arg = &arg[-2];
idArgs->ida_index = -1;
idArgs->ida_flags = IDA_ARG;
idArgs = (idArgs->ida_next = NEW(struct idarg));
ArgsCount++;
break;
default:
usage();
}
}
if (argsFrom & AF_QUERY)
exit(0);
/*
File args should only come from one place. Ding the
user if arguments came from multiple places, or if none
were supplied at all.
*/
switch (argsFrom)
{
case AF_CMDLINE:
case AF_FILE:
case AF_IDFILE:
if (PathCount > 0)
break;
/*FALLTHROUGH*/
case 0:
fprintf(stderr, "%s: Use -u, -f<file>, or cmd-line for file args!\n", MyName);
usage();
default:
fprintf(stderr, "%s: Use only one of: -u, -f<file>, or cmd-line for file args!\n", MyName);
usage();
}
if (ScanCount == 0)
exit(0);
BitArraySize = (PathCount + 7) >> 3;
initHashTable(ScanCount);
if (access(idFile, 06) < 0
&& (errno != ENOENT || access(dirname(idFile), 06) < 0)) {
filerr("modify", idFile);
exit(1);
}
for (idArgs = idArgHead; idArgs->ida_next; idArgs = idArgs->ida_next) {
char *(*scanner)();
FILE *srcFILE;
char *arg, *lang, *suff;
lang = NULL;
arg = idArgs->ida_arg;
if (idArgs->ida_flags & IDA_ARG) {
op = *arg++;
switch (*arg++)
{
case 'l':
if (*arg == '\0') {
keepLang = FALSE;
lang = NULL;
break;
}
if (op == '+')
keepLang = TRUE;
lang = arg;
break;
case 's':
sccsDir = arg;
break;
case 'r':
rcsDir = arg;
break;
case 'S':
setScanArgs(op, strsav(arg));
break;
default:
usage();
}
continue;
}
if (!(idArgs->ida_flags & IDA_SCAN))
goto skip;
if (lang == NULL) {
if ((suff = strrchr(arg, '.')) == NULL)
suff = "";
if ((lang = getLanguage(suff)) == NULL) {
fprintf(stderr, "%s: No language assigned to suffix: `%s'\n", MyName, suff);
goto skip;
}
}
if ((scanner = getScanner(lang)) == NULL) {
fprintf(stderr, "%s: No scanner for language: `%s'\n", MyName, lang);
goto skip;
}
if ((srcFILE = openSrcFILE(arg, sccsDir, rcsDir)) == NULL)
goto skip;
if (Verbose)
fprintf(stderr, "%s: %s\n", lang, arg);
extractId(scanner, srcFILE, idArgs->ida_index);
fclose(srcFILE);
skip:
if (!keepLang)
lang = NULL;
}
if (HashFill == 0)
exit(0);
if (Verbose)
fprintf(stderr, "Compressing Hash Table...\n");
hashCompress(HashTable, HashSize);
if (Verbose)
fprintf(stderr, "Sorting Hash Table...\n");
qsort(HashTable, HashFill, sizeof(struct idname *), idnQsortCmp);
if (argsFrom == AF_IDFILE) {
if (Verbose)
fprintf(stderr, "Merging Tables...\n");
updateID(idFile, idArgHead);
}
if (Verbose)
fprintf(stderr, "Writing `%s'...\n", idFile);
writeID(idFile, idArgHead);
if (Verbose) {
float loadFactor = (float)HashFill / (float)HashSize;
float aveProbes = (float)HashProbes / (float)HashSearches;
float aveOccur = (float)HashSearches / (float)HashFill;
fprintf(stderr, "Names: %ld, ", NameCount);
fprintf(stderr, "Numbers: %ld, ", NumberCount);
fprintf(stderr, "Strings: %ld, ", StringCount);
fprintf(stderr, "Solo: %ld, ", SoloCount);
fprintf(stderr, "Total: %ld\n", HashFill);
fprintf(stderr, "Occurances: %.2f, ", aveOccur);
fprintf(stderr, "Load: %.2f, ", loadFactor);
fprintf(stderr, "Probes: %.2f\n", aveProbes);
}
exit(0);
}
void
extractId(getId, srcFILE, index)
register char *(*getId)();
register FILE *srcFILE;
int index;
{
register struct idname **slot;
register char *key;
int flags;
while ((key = (*getId)(srcFILE, &flags)) != NULL) {
slot = (struct idname **)hashSearch(key, HashTable, HashSize, sizeof(struct idname *), h1str, h2str, idnHashCmp, &HashProbes);
HashSearches++;
if (*slot != NULL) {
(*slot)->idn_flags |= flags;
BITSET((*slot)->idn_bitv, index);
continue;
}
*slot = newIdName(key);
(*slot)->idn_flags = IDN_SOLO|flags;
BITSET((*slot)->idn_bitv, index);
if (HashFill++ >= HashMaxLoad)
rehash();
}
}
void
writeID(idFile, idArgs)
char *idFile;
struct idarg *idArgs;
{
register struct idname **idnp;
register struct idname *idn;
register int i;
char *vecBuf;
FILE *idFILE;
int count;
int lasti;
long before, after;
int length, longest;
struct idhead idh;
if ((idFILE = fopen(idFile, "w+")) == NULL) {
filerr("create", idFile);
exit(1);
}
fseek(idFILE, (long)sizeof(struct idhead), 0);
/* write out the list of pathnames */
idh.idh_argo = ftell(idFILE);
for (i = lasti = 0; idArgs->ida_next; idArgs = idArgs->ida_next) {
if (idArgs->ida_index > 0)
while (++lasti < idArgs->ida_index)
i++, putc('\0', idFILE);
fputs(idArgs->ida_arg, idFILE);
i++, putc('\0', idFILE);
}
idh.idh_argc = i;
idh.idh_pthc = PathCount;
/* write out the list of identifiers */
i = 1;
if (idh.idh_pthc >= 0x000000ff)
i++;
if (idh.idh_pthc >= 0x0000ffff)
i++;
if (idh.idh_pthc >= 0x00ffffff)
i++;
idh.idh_vecc = i;
vecBuf = malloc((idh.idh_pthc + 1) * idh.idh_vecc);
putc('\377', idFILE);
before = idh.idh_namo = ftell(idFILE);
longest = 0;
for (idnp = HashTable, i = 0; i < HashFill; i++, idnp++) {
idn = *idnp;
if (idn->idn_name[0] == '\0') {
HashFill--; i--;
continue;
}
if (idn->idn_flags & IDN_SOLO)
SoloCount++;
if (idn->idn_flags & IDN_NUMBER)
NumberCount++;
if (idn->idn_flags & IDN_NAME)
NameCount++;
if (idn->idn_flags & IDN_STRING)
StringCount++;
putc((*idnp)->idn_flags, idFILE);
fputs(idn->idn_name, idFILE);
putc('\0', idFILE);
count = bitsToVec(vecBuf, (*idnp)->idn_bitv, idh.idh_pthc, idh.idh_vecc);
fwrite(vecBuf, idh.idh_vecc, count, idFILE);
putc('\377', idFILE);
after = ftell(idFILE);
if ((length = (after - before)) > longest)
longest = length;
before = after;
}
idh.idh_namc = i;
putc('\377', idFILE);
idh.idh_endo = ftell(idFILE);
idh.idh_bsiz = longest;
/* write out the header */
strncpy(idh.idh_magic, IDH_MAGIC, sizeof(idh.idh_magic));
idh.idh_vers = IDH_VERS;
fseek(idFILE, 0L, 0);
fwrite(&idh, sizeof(struct idhead), 1, idFILE);
fclose(idFILE);
}
/*
Build an idarg vector from pathnames contained in an existing
id file. Only include pathnames for files whose modification
time is later than that of the id file itself.
*/
void
oldIdArgs(idFile, idArgsP)
char *idFile;
struct idarg **idArgsP;
{
struct stat statBuf;
struct idhead idh;
FILE *idFILE;
register int i;
register char *strings;
time_t idModTime;
if ((idFILE = fopen(idFile, "r")) == NULL) {
filerr("open", idFile);
usage();
}
/*
* Open the id file, get its mod-time, and read its header.
*/
if (fstat(fileno(idFILE), &statBuf) < 0) {
filerr("stat", idFile);
usage();
}
idModTime = statBuf.st_mtime;
fread(&idh, sizeof(struct idhead), 1, idFILE);
if (!strnequ(idh.idh_magic, IDH_MAGIC, sizeof(idh.idh_magic))) {
fprintf(stderr, "%s: Not an id file: `%s'\n", MyName, idFile);
exit(1);
}
if (idh.idh_vers != IDH_VERS) {
fprintf(stderr, "%s: ID version mismatch (%ld,%ld)\n", MyName, idh.idh_vers, IDH_VERS);
exit(1);
}
/*
* Read in the id pathnames, compare their mod-times with
* the id file, and incorporate the pathnames of recently modified
* files in the idarg vector. Also, construct a mask of
* bit array positions we want to turn off when we build the
* initial hash-table.
*/
fseek(idFILE, idh.idh_argo, 0);
strings = malloc(i = idh.idh_namo - idh.idh_argo);
fread(strings, i, 1, idFILE);
ScanCount = 0;
for (i = 0; i < idh.idh_argc; i++) {
(*idArgsP)->ida_arg = strings;
if (*strings == '+' || *strings == '-') {
(*idArgsP)->ida_flags = IDA_ARG;
(*idArgsP)->ida_index = -1;
} else {
(*idArgsP)->ida_flags = IDA_PATH;
(*idArgsP)->ida_index = postIncr(&PathCount);
if (stat(strings, &statBuf) < 0) {
filerr("stat", strings);
} else if (statBuf.st_mtime >= idModTime) {
(*idArgsP)->ida_flags |= IDA_SCAN;
ScanCount++;
}
}
(*idArgsP) = ((*idArgsP)->ida_next = NEW(struct idarg));
while (*strings++)
;
}
if (ScanCount == 0) {
fclose(idFILE);
exit(0);
}
fclose(idFILE);
}
void
updateID(idFile, idArgs)
char *idFile;
struct idarg *idArgs;
{
struct idname *idn;
struct idhead idh;
register char *bitArray;
char *entry;
register int i;
FILE *idFILE;
int cmp, count, size;
char *bitsOff;
struct idname **newTable, **mergeTable;
struct idname **t1, **t2, **tm;
if ((idFILE = fopen(idFile, "r")) == NULL)
filerr("open", idFile);
fread(&idh, sizeof(struct idhead), 1, idFILE);
entry = malloc(idh.idh_bsiz);
bitsOff = malloc(BitArraySize);
bzero(bitsOff, BitArraySize);
for (i = 0; idArgs->ida_next; idArgs = idArgs->ida_next)
if (idArgs->ida_flags & IDA_SCAN)
BITSET(bitsOff, idArgs->ida_index);
bitArray = malloc(BitArraySize);
bzero(bitArray, BitArraySize);
t2 = newTable = (struct idname **)malloc((idh.idh_namc + 1) * sizeof(struct idname *));
fseek(idFILE, idh.idh_namo, 0);
count = 0;
for (i = 0; i < idh.idh_namc; i++) {
size = 1 + fgets0(entry, idh.idh_bsiz, idFILE);
getsFF(&entry[size], idFILE);
vecToBits(bitArray, &entry[size], idh.idh_vecc);
bitsclr(bitArray, bitsOff, BitArraySize);
if (!bitsany(bitArray, BitArraySize))
continue;
*t2 = newIdName(ID_STRING(entry));
bitsset((*t2)->idn_bitv, bitArray, BitArraySize);
(*t2)->idn_flags = ID_FLAGS(entry);
bzero(bitArray, BitArraySize);
t2++; count++;
}
*t2 = NULL;
t1 = HashTable;
t2 = newTable;
tm = mergeTable = (struct idname **)calloc(HashFill + count + 1, sizeof(struct idname *));
while (*t1 && *t2) {
cmp = strcmp((*t1)->idn_name, (*t2)->idn_name);
if (cmp < 0)
*tm++ = *t1++;
else if (cmp > 0)
*tm++ = *t2++;
else {
(*t1)->idn_flags |= (*t2)->idn_flags;
(*t1)->idn_flags &= ~IDN_SOLO;
bitsset((*t1)->idn_bitv, (*t2)->idn_bitv, BitArraySize);
*tm++ = *t1;
t1++, t2++;
}
}
while (*t1)
*tm++ = *t1++;
while (*t2)
*tm++ = *t2++;
*tm = NULL;
HashTable = mergeTable;
HashFill = tm - mergeTable;
}
/*
Cons up a list of idArgs as supplied in a file.
*/
void
fileIdArgs(argFILE, idArgsP)
FILE *argFILE;
struct idarg **idArgsP;
{
int fileCount;
char buf[BUFSIZ];
char *arg;
fileCount = 0;
while (fgets(buf, sizeof(buf), argFILE)) {
(*idArgsP)->ida_arg = arg = strnsav(buf, strlen(buf)-1);
if (*arg == '+' || *arg == '-') {
(*idArgsP)->ida_flags = IDA_ARG;
(*idArgsP)->ida_index = -1;
} else {
(*idArgsP)->ida_flags = IDA_SCAN|IDA_PATH;
(*idArgsP)->ida_index = postIncr(&PathCount);
ScanCount++;
}
(*idArgsP) = ((*idArgsP)->ida_next = NEW(struct idarg));
}
}
void
initHashTable(pathCount)
int pathCount;
{
if ((HashSize = round2((pathCount << 6) + 511)) > 0x8000)
HashSize = 0x8000;
HashMaxLoad = HashSize - (HashSize >> 4); /* about 94% */
HashTable = (struct idname **)calloc(HashSize, sizeof(struct idname *));
}
/*
Double the size of the hash table in the
event of overflow...
*/
void
rehash()
{
long oldHashSize = HashSize;
struct idname **oldHashTable = HashTable;
register struct idname **htp;
register struct idname **slot;
HashSize *= 2;
if (Verbose)
fprintf(stderr, "Rehashing... (doubling size to %ld)\n", HashSize);
HashMaxLoad = HashSize - (HashSize >> 4);
HashTable = (struct idname **)calloc(HashSize, sizeof(struct idname *));
HashFill = 0;
for (htp = oldHashTable; htp < &oldHashTable[oldHashSize]; htp++) {
if (*htp == NULL)
continue;
slot = (struct idname **)hashSearch((*htp)->idn_name, (char *)HashTable, HashSize, sizeof(struct idname *), h1str, h2str, idnHashCmp, &HashProbes);
if (*slot) {
fprintf(stderr, "%s: Duplicate hash entry!\n");
exit(1);
}
*slot = *htp;
HashSearches++;
HashFill++;
}
free(oldHashTable);
}
/*
Round a given number up to the nearest power of 2.
*/
int
round2(rough)
int rough;
{
int round;
round = 1;
while (rough) {
round <<= 1;
rough >>= 1;
}
return round;
}
/*
`compar' function for hashSearch()
*/
int
idnHashCmp(key, idn)
char *key;
struct idname **idn;
{
int collate;
if (*idn == NULL)
return 0;
if ((collate = strcmp(key, (*idn)->idn_name)) == 0)
(*idn)->idn_flags &= ~IDN_SOLO; /* we found another occurance */
return collate;
}
/*
`compar' function for qsort().
*/
int
idnQsortCmp(idn1, idn2)
struct idname **idn1;
struct idname **idn2;
{
if (*idn1 == *idn2)
return 0;
if (*idn1 == NULL)
return 1;
if (*idn2 == NULL)
return -1;
return strcmp((*idn1)->idn_name, (*idn2)->idn_name);
}
/*
Allocate a new idname struct and fill in the name field.
We allocate memory in large chunks to avoid frequent
calls to malloc() which is a major pig.
*/
struct idname *
newIdName(name)
char *name;
{
register struct idname *idn;
register char *allocp;
register int allocsiz;
static char *allocBuf = NULL;
static char *allocEnd = NULL;
#define ALLOCSIZ (8*1024)
allocsiz = sizeof(struct idname) + strlen(name) + 1 + BitArraySize;
allocsiz += (sizeof(long) - 1);
allocsiz &= ~(sizeof(long) - 1);
allocp = allocBuf;
allocBuf += allocsiz;
if (allocBuf > allocEnd) {
allocBuf = malloc(ALLOCSIZ);
allocEnd = &allocBuf[ALLOCSIZ];
allocp = allocBuf;
allocBuf += allocsiz;
}
idn = (struct idname *)allocp;
allocp += sizeof(struct idname);
idn->idn_bitv = allocp;
for (allocsiz = BitArraySize; allocsiz--; allocp++)
*allocp = '\0';
idn->idn_name = strcpy(allocp, name);
return idn;
}
int
postIncr(ip)
int *ip;
{
register int i;
int save;
save = *ip;
i = save + 1;
if ((i & 0x00ff) == 0x00ff)
i++;
if ((i & 0xff00) == 0xff00) /* This isn't bloody likely */
i += 0x100;
*ip = i;
return save;
}
/*
Move all non-NULL table entries to the front of the table.
return the number of non-NULL elements in the table.
*/
int
hashCompress(table, size)
char **table;
int size;
{
register char **front;
register char **back;
front = &table[-1];
back = &table[size];
for (;;) {
while (*--back == NULL)
;
if (back < front)
break;
while (*++front != NULL)
;
if (back < front)
break;
*front = *back;
}
return (back - table + 1);
}